# Packages ----------------------------------------------------------------
library(dplyr)
library(ggplot2)

# Data --------------------------------------------------------------------
data <- read.csv("data_paper.csv") %>% 
  tidyr::drop_na(state) %>% 
  mutate(state = ifelse(state == "AB", "AL", as.character(state)),
         state = ifelse(state == "RH", "RI", as.character(state))
         ) %>% 
  filter(state %in% c(
    "FL", "OH", "NC", "GA", "VA", "PA", "MI"
  )) |> 
  select(X, state = state, starts_with("ppl_pred_state"))

forecasted_vote_shares <- data %>% 
  tidyr::pivot_longer(., cols = starts_with("ppl_pred_state"),
                      names_to = "candidate", values_to = "likelihood",
                      names_prefix = "ppl_pred_state_") %>% 
  group_by(state, candidate) %>% 
  summarise(likelihood_Average = mean(likelihood, na.rm = TRUE),
            likelihood_Median = median(likelihood, na.rm = TRUE)) %>% 
  tidyr::pivot_longer(., cols = c("likelihood_Average", "likelihood_Median"),
                      names_to = "stat", values_to = "likelihood",
                      names_prefix = "likelihood_") %>%
  group_by(state, stat) %>% 
  mutate(estimated_vote_share = likelihood / sum(likelihood))

n_by_state <- data %>% 
  group_by(state) %>% 
  summarise(n = n())

colors <- c("biden" = "#0076CE",
            "trump" = "#FF0000",
            "middle" = "grey45",
            "rfk" = "#FFD700")

forecasted_vote_shares %>%
  ungroup() %>%
  left_join(., n_by_state, by = "state") %>%
  ggplot(aes(x = stat, y = estimated_vote_share)) +
  facet_wrap(
    ~state
  ) +
  geom_col(
    aes(group = candidate, fill = candidate),
    position = position_dodge(width = 0.8),
    width = 0.8,
    alpha = 0.35, color = NA
  ) +
  geom_text(
    aes(label = round(estimated_vote_share, 2),
        color = candidate, y = estimated_vote_share - 0.1
      ),
    show.legend = FALSE,
    position = position_dodge(width = 0.8),
    size = 4) +
  scale_fill_manual(
    values = colors,
    breaks = c("trump", "biden"),
    labels = c("Trump", "Biden")
  ) +
  scale_color_manual(
    values = colors
  ) +
  scale_y_continuous(name = "Forecasted State\nVote Share\n",
                     expand = c(0, 0),
                     limits = c(0, 0.85)) +
  labs(caption = "The forecasted vote share is computed by dividing the central likelihood for\n   a candidate by the sum of the central likelihoods of all candidates.\nThe central likelihood is calculated using the average or the median.") +
  theme(
    axis.title.x = element_blank(),
    strip.text.x = element_text(size = 13),
    strip.background.x = element_rect(fill = "grey90", color = NA),
    panel.background = element_rect(color = "grey90", fill = NA),
    panel.grid.major.y = element_blank(),
    legend.position = "none"
  )

ggsave(
  "fig2_forecasted_vote_share_swing_states.png",
  width = 9, height = 5.5, dpi = 300
)
